Reverse-engineering the brain
mnist_data = getOMLDataSet(did = 554)$data # MNIST dataset from OpenML
par( mfrow = c(10,10), mai = c(0,0,0,0)) # 10x10 grid
for(i in 1:100){ # Convert first 100 rows to 28x28 matrix and plot
y = matrix(as.matrix(mnist_data[i, 1:784]), nrow=28)
image( y[,nrow(y):1], axes = FALSE, col = gray(255:0 / 255))}

perceptron()
px <- seq(-6, 6, length.out = 101)
plot(px, px>=0, type = "l", xlab = "", ylab = "", main = "Perceptron response")
mnist_bin = droplevels(subset(mnist_data, class %in% c(0,1))) #MNIST with only 0 and 1 (binary)
# Now, plot output for 2 random pixels
ggplot(data=mnist_bin, aes(x=pixel217, y=pixel518, color=class)) + geom_point()
mnist_task = makeClassifTask(data = mnist_bin, target = "class")
# We're mimicking a perceptron by constructing a neural net with 1 hidden node
# Actually this is a sigmoid perceptron, but still quite close
perceptron = makeLearner("classif.nnet", par.vals = list(size = 1, trace = FALSE))
plotLearnerPrediction(perceptron, mnist_task, features = c("pixel217", "pixel518")) + theme_cowplot()
ws = seq(0,4,len=20) # for plotting
f = function(w) { (w-2)^2 } # function to optimize
plot(ws , f(ws), type="l") # Plot target
w = c(0.1,0.1-0.1*2*(0.1-2)) # We'll see later how to compute this
lines (w, f(w), type="b",col="blue")
grad = function(w){2*(w-2)} # gradient df/dw
w = 0.1 # initialize (first guess)
learningRate = 0.1 # try smaller and larger values
wtrace = w # initialize (first guess)
ftrace = f(w) # store y-values
for (step in 1:100) {
w = w - learningRate*grad(w) # gradient descent update
wtrace = c(wtrace,w) # store next x-value
ftrace = c(ftrace,f(w)) # store next y-value
}
plot(ws , f(ws), type="l") # Plot target
lines( wtrace , ftrace , type="b",col="blue") # Plot steps
# Print the gradient descent trace
perceptron = makeLearner("classif.nnet", par.vals = list(size = 1, trace = TRUE))
plotLearnerPrediction(perceptron, mnist_task, features = c("pixel217", "pixel518")) + theme_cowplot()
# weights: 5 initial value 10224.006537 iter 10 value 5232.791152 iter 20 value 5212.206316 iter 30 value 5124.183676 iter 40 value 5113.415955 iter 50 value 5109.301886 iter 60 value 5021.879675 final value 5021.799567 converged # weights: 5 initial value 11500.276477 iter 10 value 4851.417742 iter 20 value 4774.501144 iter 20 value 4774.501122 iter 20 value 4774.501122 final value 4774.501122 converged # weights: 5 initial value 9746.191685 iter 10 value 4534.410567 iter 20 value 4428.864166 iter 30 value 4279.029180 iter 40 value 4275.725179 final value 4275.724186 converged # weights: 5 initial value 9738.423876 iter 10 value 7237.066693 iter 20 value 4681.590380 iter 30 value 4573.803819 iter 40 value 4568.382011 final value 4568.295740 converged # weights: 5 initial value 9380.506381 iter 10 value 4603.716474 iter 20 value 4594.770100 final value 4594.593781 converged # weights: 5 initial value 9491.839586 iter 10 value 6216.765091 iter 20 value 4572.094083 iter 30 value 4518.575933 iter 40 value 4481.482937 iter 50 value 4384.528411 iter 60 value 4312.683587 iter 70 value 4253.134229 final value 4249.412988 converged # weights: 5 initial value 9181.592287 iter 10 value 4533.939988 iter 20 value 4530.258527 final value 4530.189631 converged # weights: 5 initial value 9571.938604 iter 10 value 4947.960969 iter 20 value 4603.497357 iter 30 value 4576.344842 iter 40 value 4560.221413 iter 40 value 4560.221409 final value 4560.221409 converged # weights: 5 initial value 9462.336988 iter 10 value 5753.417466 final value 4850.623988 converged # weights: 5 initial value 9305.662774 iter 10 value 4646.948835 iter 20 value 4642.563847 iter 30 value 4640.294298 iter 40 value 4570.690520 iter 50 value 4530.658483 iter 60 value 4525.486476 iter 70 value 4510.012578 iter 80 value 4508.865563 final value 4508.848658 converged # weights: 5 initial value 9574.994267 iter 10 value 4894.721150 iter 20 value 4706.184664 iter 30 value 4559.412772 iter 40 value 4558.326991 final value 4557.900313 converged
plot(px, sigmoid(px, a = 1), type = "l",
xlab = "", ylab = "", main = "Sigmoid")
mlp = makeLearner("classif.nnet", par.vals = list(size = 50, trace = FALSE))
plotLearnerPrediction(mlp, mnist_task, features = c("pixel217", "pixel518")) + theme_cowplot()
Thanks to Bernd Bischl and Tobias Glasmachers for useful input.